30DayChartChallenge

Author

Maelle Boutraud

Published

July 1, 2024

https://www.cedricscherer.com/2024/04/01/contributions-30daychartchallenge-2024/

https://www.cedricscherer.com/2024/04/01/contributions-30daychartchallenge-2024/

Here is my project of the 30 Day Chart Challenge using R while trying to bring good data and understand better about visualization and the different graphs you can use… Come learn with me and watch my evolution creating graphs !

Setup of package installation :

Show the code
if (!"librarian" %in% installed.packages())
install.packages("librarian")
librarian::shelf(ggplot2, zmeers / ggparliament, tidyr, viridis, patchwork, dplyr, readxl, plotly, rworldmap)

Day 1 : Part to Whole

Show the code
library(ggparliament)
library(tidyverse)

seats <- c(51, 188, 52, 68, 219, 73, 42, 35, 23)

names_party <- c("The European United Left/Nordic Green Left",
                 "Progressive Alliance of Socialists and Democrats",
                 "Greens/European Free Alliance",
                 "Alliance of Liberals and Democrats for Europe",
                 "The European Peoples Party",
                 "European Conservatives and Reformists",
                 "Europe of Freedom and Direct Democracy",
                 "Europe of Nations and Freedom",
                 "Non-attached members")

# Colours of the different party using https://htmlcolorcodes.com/fr/
party_colours <- c("#C0392B", "#E74C3C", "#52BE80", "#F4D03F", "#21618C", "#3498DB", "#984EA3", "#707B7C", "#999999")

# Abbreviated column for the party
party <- c("GUE/NGL", "S&D", "Greens/EFA", "ALDE", "EPP", "ECR", "EFDD", "ENF", "NA")

# Creation of the data frame
europeen <- data.frame(
  party = party,
  seats = seats,
  mycol = party_colours
)

# Creation of data for the semicircle plot
europeen_semicircle <- parliament_data(election_data = europeen, type = "semicircle", parl_rows = 14, party_seats = europeen$seats)

# Creation of plot
ggplot(europeen_semicircle, aes(x = x, y = y, colour = party)) +
  geom_parliament_seats(size = 2.5)  +
  theme_ggparliament() +
  labs(title = "Europeen Parlement, 2024") +
  scale_colour_manual(values = party_colours, 
                      limits = party)

Day 2 : Neo

Show the code
library(ggplot2)


Naissance <- data.frame(
  Country = c("European Union", "Austria", "Belgium", "Bulgaria", "Croatia",
              "Cyprus", "Czech Republic", "Denmark","Estonia", "Finland",
              "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", 
              "Latvia", "Lithuania", "Luxembourg", "Netherlands", "Poland",
              "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", 
              "Sweden", "Norway", "Russia", "Switzerland", "United Kingdom", 
              "Australia", "Canada", "Chile", "Colombia",
              "Israel", "Japan", "Mexico", "New-Zealand", "South Korea",
              "Turkey", "United States"),
  Birth_rate_2022 = c(8.7, 9.1, 9.8, 8.8, 8.8, 11.1, 9.5, 9.9, 8.6, 8.1, 10.6,
                      8.8, 7.3, 9.3, 11.2, 6.7, 8.5, 7.8, 9.9,
                      9.5, 8.3, 8.0, 9.6, 9.7, 8.3, 6.9, 10.0, 9.4, 9.6, 9.4,
                      10.7, 11.5, 9.8, 11.8, 14.2, 19.6, 6.6, 
                      14.9, 12.4, 5.6, 14.7, 11.1)
  )

# Creation of plot 
ggplot(Naissance, aes(x = reorder(Country, Birth_rate_2022), y = Birth_rate_2022, fill = Birth_rate_2022)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  scale_fill_gradient(low = "skyblue", high = "darkblue") +
  labs(title = "Birth rate by Country in 2022", x = "Country", y = "Birth rate") +
  theme_minimal()

Day 3 : Makeover

First plot for makeover

Show the code
library(ggplot2)
library(dplyr)
library(tidyr)

GHG <- data.frame( Sectors = c("Energy industry", "Manufacturing industry",
"Waste treatment", "Residential activities",
"Agriculture", "Road transport", "Other transport", "Total"),
  
  `2017` = c(57.3, 82.9, 15.1, 84.1, 81.3, 130, 8, 459),
  `2018` = c(48, 82.8, 14.8, 79, 80.4, 127, 8.1, 440),
  `2019` = c(46.2, 79.8, 16, 76, 78.6, 126, 8.2, 431),
  `2020` = c(41.3, 72, 15.9, 71.3, 78.2, 107, 6.2, 392),
  `2021` = c(42.5, 78, 15.2, 75.1, 76.5, 120, 7.1, 415),
  `2022` = c(44.6, 73, 15.2, 64, 76.5, 122, 8.1, 404))

GHG_long <- GHG %>%
  pivot_longer(
    cols = starts_with("X"),
    names_to = "Years",
    values_to = "Emissions"
    )

neon_colors <- c("#AEC6CF", "#FFB347", "#77DD77", "#FF6961", "#CFCFC4", "#F49AC2", "#FFD1DC")

# Creation of plot
ggplot(GHG_long, aes(x = Emissions, y = reorder(Sectors, Emissions), fill = Years)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = neon_colors, name = "Years") +
  theme_minimal() +
  labs(
    title = "Greenhouse gas emissions by sectors", 
    subtitle = "(In France)",
    x = "Emissions (Mt CO2 equivalent)",
    y = "Sectors/ Activity"
    )

Second Plot - using stacked bars

Show the code
# Load required packages
library(ggplot2)
library(dplyr)
library(tidyr)

# Data definition
GHG <- data.frame( 
  Sectors = c("Energy industry", "Manufacturing industry",
             "Waste treatment", "Residential activities",
             "Agriculture", "Other transport", "Road transport"),

`2017` = c(57.3, 82.9, 15.1, 84.1, 81.3, 8, 130),
`2018` = c(48, 82.8, 14.8, 79, 80.4, 8.1, 127),
`2019` = c(46.2, 79.8, 16, 76, 78.6, 8.2, 126),
`2020` = c(41.3, 72, 15.9, 71.3, 78.2, 6.2, 107),
`2021` = c(42.5, 78, 15.2, 75.1, 76.5, 7.1, 120),
`2022` = c(44.6, 73, 15.2, 64, 76.5, 8.1, 122)
)
# Pivoter les données pour avoir une colonne Annee, Secteur, et Emissions
GHG_long <- GHG %>%
pivot_longer(cols = -Sectors, names_to = "Years", values_to = "Emissions")

# Personalized colours for each activity or sector
sector_colors <- c("#AEC6CF", "#FFB347", "#77DD77", "#FF6961", "#CFCFC4", "#F49AC2", "#FFD1DC")

# Creation of plot
ggplot(GHG_long, aes(x = Years, y = Emissions, fill = Sectors)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(values = sector_colors) +
labs(
  title = "Greenhouse gas emissions by sectors",
  x = "Years",
  y = "Emissions (Mt CO2 equivalent)") +
  theme_minimal()

This shows the importance of choosing the right graph for certain data for better visualization.

Day 4 : Waffle

Show the code
library(waffle)
library(ggplot2)

Energie <- c("Renewable", "Hydropower", "Nuclear", "Natural Gas", "Coal", "Oil")
stats <- c(10.8, 15.7, 10.7, 23.5, 36.7, 2.8)
yey <- data.frame(Energie = Energie, stats = stats)

colors <- c("#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3", "#a6d854", "#ffd92f")

parts <- setNames(yey$stats, yey$Energie)

waffle_chart <- waffle(
parts = parts,          
rows = 10,            
size = 0.5,            
colors = colors         
) +
ggtitle("Energy used to generate electricity worldwide") +  
labs(caption = "Source: Hello Watt") +  
theme(
plot.title = element_text(size = 12, face = "bold", hjust = 0.6),
legend.title = element_blank(),
legend.text = element_text(size = 12),
legend.position = "bottom",
plot.caption = element_text(size = 10, face = "italic"),
plot.title.position = "plot",  
plot.margin = margin(10, 10, 10, 10)
)

print(waffle_chart)

Day 5 : Diverging

For the time being, this is from a GitHub example using the networkD3 package. Reference

This graph shows the migration from one country (left) to another (right).

Show the code
# Libraries
library(tidyverse)
library(viridis)
library(patchwork)
library(hrbrthemes)
library(circlize)

# Load dataset from github, 
# let's give it a better name than data.

immidata <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/13_AdjacencyDirectedWeighted.csv", header=TRUE)
# Package we need here 
library(networkD3)

# We need a long format
immidata_long <- immidata %>%
  rownames_to_column %>%
  gather(key = 'key', value = 'value', -rowname) %>%
  filter(value > 0)
colnames(immidata_long) <- c("source", "target", "value")
immidata_long$target <- paste(immidata_long$target, " ", sep="")

# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(name=c(as.character(immidata_long$source), as.character(immidata_long$target)) %>% unique())

# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
immidata_long$IDsource=match(immidata_long$source, nodes$name)-1
immidata_long$IDtarget=match(immidata_long$target, nodes$name)-1

# prepare colour scale
ColourScal ='d3.scaleOrdinal() .range(["#FDE725FF","#B4DE2CFF","#6DCD59FF","#35B779FF","#1F9E89FF","#26828EFF","#31688EFF","#3E4A89FF","#482878FF","#440154FF"])'

# Make the Network
sankeyNetwork(Links = immidata_long, Nodes = nodes,
              Source = "IDsource", Target = "IDtarget",
              Value = "value", NodeID = "name",
              sinksRight=FALSE, colourScale=ColourScal, nodeWidth=40, fontSize=13, nodePadding=20)
Show the code
#https://www.data-to-viz.com/graph/sankey.html

Day 6 : OECD

Show the code
library(readxl)
library(ggplot2)
library(tidyverse)
library(plotly)

OECD <- read_excel("../Rwork/30DayChartChallenge/OECDmigration.xlsx")

# Renommer la colonne et filtrer les lignes non nulles
migration_data <- rename(OECD, Country = 1)
migration_data <- filter(migration_data, !is.na(Country))

# Convertir les données en format long
migration_long <- pivot_longer(migration_data, -Country, names_to = "Year", values_to = "Value")

# Convertir les colonnes Year et Value en numériques
migration_long$Year <- as.numeric(migration_long$Year)
migration_long$Value <- as.numeric(gsub(",", "", migration_long$Value))

# Créer le graphique avec ggplot2
p <- ggplot(migration_long, aes(x = Year, y = Value, color = Country, group = Country, text = paste("Country:", Country, "<br>Year:", Year, "<br>Value:", Value))) +
geom_line() +
theme_minimal() +
labs(title = "Chronological Chart of International migration",
x = "Year",
y = "People") +
scale_x_continuous(breaks = seq(min(migration_long$Year), max(migration_long$Year), by = 1)) +  # Forcer l'affichage de toutes les années
theme(legend.position = "none")  # Suppression de la légende

#Convertir le graphique ggplot2 en graphique plotly interactif
p_interactive <- ggplotly(p, tooltip = "text")

# Afficher le graphique interactif
p_interactive
Show the code
library(readxl)
library(ggplot2)
library(tidyverse)
library(plotly)

OECD2 <- read_excel("../Rwork/30DayChartChallenge/OECD2.xlsx")

# Renommer la colonne et filtrer les lignes non nulles
migration_data <- rename(OECD2, Country = 1)
migration_data <- filter(migration_data, !is.na(Country))

# Convertir les données en format long
migration_long <- pivot_longer(migration_data, -Country, names_to = "Year", values_to = "Value")

# Convertir les colonnes Year et Value en numériques
migration_long$Year <- as.numeric(migration_long$Year)
migration_long$Value <- as.numeric(gsub(",", "", migration_long$Value))

# Créer le graphique avec ggplot2
p <- ggplot(migration_long, aes(x = Year, y = Value, color = Country, group = Country, text = paste("Country:", Country, "<br>Year:", Year, "<br>Value:", Value))) +
  geom_line() +
  theme_minimal() +
  labs(title = "Chronological Chart of International migration",
       x = "Year",
       y = "Proportion of migrants on the total population") +
  scale_x_continuous(breaks = seq(min(migration_long$Year), max(migration_long$Year), by = 1)) +  # Forcer l'affichage de toutes les années
  theme(legend.position = "none")  # Suppression de la légende

#Convertir le graphique ggplot2 en graphique plotly interactif
p_interactive <- ggplotly(p, tooltip = "text")

# Afficher le graphique interactif
p_interactive

Day 7 : Hazards

Show the code
library(rworldmap)
library(readr)

# Data settings
wildfires <- read_csv("../Rwork/30DayChartChallenge/wildfires.csv")
wildfires2023_na <-wildfires[wildfires$Year==2023,]
wildfires2023 <- wildfires2023_na[complete.cases(wildfires2023_na), ]


colnames(wildfires2023)[4]<-"AreaBurnt"
wildfires2023$logArea<-log(wildfires2023$AreaBurnt+1)
wildfires23<-wildfires2023[-which(wildfires2023$logArea<0.1),]

colour_fire = c("#FFFF00","#FFD700","#FFA500","#FF8C00","#E66608","#D14209","#C40A0A","#8E0B0B")

# Join wildfire data with spatial data
sPDF <- joinCountryData2Map(wildfires23,
                            , joinCode = "ISO3"
                            , nameJoinColumn = "Code")
Show the code
# Create map
mapCountryData(sPDF,nameColumnToPlot='logArea', 
               colourPalette= colour_fire, 
               catMethod="pretty", 
               mapTitle = "Wilfires in the World 2023",
               xlim = NA,
               ylim = NA,
               oceanCol = NA,
               lwd = 0.5, 
               addLegend = TRUE)

Data from : https://ourworldindata.org/wildfires

Day 8 : Circular

Show the code
library(ggplot2)
library(tidyr)
library(dplyr)
# Data preparation
temperature <- data.frame(
Année = c("2024", "2023", "2022", "2021", "2020", "2019", "2018", "2017", "2016", "2015", "2014"),
Moyenne = c(6.1, 14.4, 14.5, 12.9, 14.1, 13.7, 13.9, 13.4, 13.1, 13.6, 13.8),
Janvier = c(9.6, 6.3, 5.0, 4.9, 7.1, 4.6, 8.4, 3.1, 7.1, 5.6, 7.6),
Février = c(10.6, 6.9, 8.1, 8.2, 9.3, 8.0, 3.5, 8.2, 7.3, 4.9, 7.9),
Mars = c(12.4, 10.2, 9.9, 8.9, 9.4, 10.1, 8.2, 11.0, 8.0, 9.1, 9.8),
Avril = c(15.5, 11.8, 11.8, 10.4, 14.1, 11.7, 13.8, 11.4, 11.1, 12.6, 13.0),
Mai = c(NA, 16.2, 17.8, 13.8, 16.5, 13.9, 16.3, 16.5, 15.0, 15.5, 14.7),
Juin = c(NA, 21.5, 21.2, 20.3, 18.6, 20.1, 20.1, 21.2, 18.7, 19.8, 19.6),
Juillet = c(NA, 21.9, 23.2, 20.7, 21.6, 23.0, 23.2, 21.7, 21.3, 22.8, 20.6),
Août = c(NA, 22.0, 23.7, 20.0, 22.6, 21.8, 22.3, 21.5, 21.5, 21.6, 19.1),
Septembre = c(NA, 21.1, 18.2, 19.3, 19.0, 18.5, 19.0, 16.4, 19.7, 16.4, 18.9),
Octobre = c(NA, 16.4, 17.2, 13.5, 13.0, 15.1, 14.4, 14.9, 12.6, 12.7, 15.9),
Novembre = c(NA, 10.1, 10.9, 7.9, 10.5, 8.8, 9.6, 8.2, 8.8, 11.2, 11.3),
Décembre = c(NA, 8.0, 6.7, 6.9, 6.9, 8.1, 7.7, 5.9, 5.6, 9.5, 6.3)
)
# Reshape data to long format
temperature_long <- temperature %>%
pivot_longer(cols = Janvier:Décembre, names_to = "Month", values_to = "Temperature")
# Ensure Month is a factor with the correct order
temperature_long$Month <- factor(temperature_long$Month, levels = c("Janvier", "Février", "Mars", "Avril", "Mai", "Juin", "Juillet", "Août", "Septembre", "Octobre", "Novembre", "Décembre"))
# Plot the radial histogram
ggplot(temperature_long, aes(x = Month, y = Temperature, group = Année, fill = Année)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) +
coord_polar() +
theme_minimal() +
labs(title = "Radial Histogram of Monthly Temperatures",
x = "",
y = "Temperature (°C)",
fill = "Year") +
theme(axis.text.x = element_text(size = 8),
axis.title.y = element_text(size = 10))

Day 9 : Major / Minor

Show the code
# Load necessary libraries
library(tidyr)
library(dplyr)
library(ggplot2)

# Simulate reading the Excel file
# temp <- read_excel("./Paris.xlsx")
# Since the actual file is not provided, we'll create the dataframe manually based on the provided structure.

temp <- data.frame(
    Month = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Year"),
    `Record high °C` = c(16.1, 21.4, 26.0, 30.2, 34.8, 37.6, 42.6, 39.5, 36.2, 28.9, 21.6, 17.1, 42.6),
    `Daily mean °C` = c(5.4, 6.0, 9.2, 12.2, 15.6, 18.8, 20.9, 20.8, 17.2, 13.2, 8.7, 5.9, 12.8),
    `Record low °C` = c(-14.6, -14.7, -9.1, -3.5, -0.1, 3.1, 6.0, 6.3, 1.8, -3.8, -14.0, -23.9, -23.9)
)

# Reshape the data to long format
temp_long <- temp %>%
    pivot_longer(cols = -Month, names_to = "Temperature Type", values_to = "Temperature")

# Remove the 'Year' row as it doesn't represent a specific month
temp_long <- temp_long %>%
    filter(Month != "Year")

# Convert Month to a factor with the correct order
temp_long$Month <- factor(temp_long$Month, levels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))

# Plot the data
p <- ggplot(data = temp_long, aes(x = Month, y = Temperature, color = `Temperature Type`, group = `Temperature Type`)) +
    geom_point() +
    geom_smooth(se = FALSE) +
    labs(title = "Temperature Trends in Paris",
         x = "Month",
         y = "Temperature (°C)",
         color = "Temperature Type") +
    theme_minimal()

print(p)

Day 10 : Physical

Show the code
library(ggplot2)
library(plotly)

# Loading data with read_csv()
physical = read_csv("../Rwork/30DayChartChallenge/physical-integrity-rights-index-population-weighted.csv")

# Modify column name to prepare the ploting
colnames(physical)[4] = "index"

# Create plot
plot_physical <- ggplot(data = physical, aes(x = Year, y = index, group = Entity, color = Entity)) +
  geom_line() + 
  labs(title = "Physical integrity rights index",
       x = "Year",
       y = "Physical violence index",
       color = "Country") +
  theme_minimal()

#plot_physical
# Convert plot to plotly to make it interactive
plotly_physical <- ggplotly(plot_physical)

# Print the interactive plot
plotly_physical

Source : https://ourworldindata.org/grapher/physical-integrity-rights-index-population-weighted?time=earliest..2023

Day 11 : Mobile Friendly

Show the code
# Load the library
library(ggplot2)

# Create data for each age group
years <- c(2011, 2016, 2022)
age_1_6 <- c(12.29, 14.15, 14.21)
age_7_12 <- c(19.58, 20.27, 24.41)
age_13_19 <- c(30.23, 32.6, 36.08)

# Create a data frame for each age group
screentime_1_6 <- data.frame(
  year = years,
  duration = age_1_6,
  age_group = rep("1-6 years", length(years))
)

screentime_7_12 <- data.frame(
  year = years,
  duration = age_7_12,
  age_group = rep("7-12 years", length(years))
)

screentime_13_19 <- data.frame(
  year = years,
  duration = age_13_19,
  age_group = rep("13-19 years", length(years))
)

# Combine the data frames into one
screentime <- rbind(screentime_1_6, screentime_7_12, screentime_13_19)

# Create the plot using ggplot2
p <- ggplot(screentime, aes(x = year, y = duration, color = age_group, group = age_group)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  annotate("text", x = screentime$year, y = screentime$duration + 0.5, 
           label = paste(round(screentime$duration, 2), "h"), 
           vjust = -0.5, size = 3, color = "black") +  # Add annotations just above the points
  scale_color_manual(values = c("1-6 years" = "#FDBF6F", "7-12 years" = "#B2DF8A", "13-19 years" = "#A6CEE3")) +  
  scale_x_continuous(breaks = seq(2012, 2022, by = 1)) +
  scale_y_continuous(breaks = seq(0, max(screentime$duration), by = 5)) +  # Set y-axis labels every 5 hours
  labs(
    title = "Evolution of weekly screen time for children and adolescents in France",
    x = "Years",
    y = "Duration in hours",
    color = "Age groups"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

print(p)  # Display the plot

Source : https://fr.statista.com/statistiques/1414345/duree-utilisation-ecran-france-jeunes/

Day 12 : Reuters Graphic

Show the code
# Load necessary libraries
library(rworldmap)
library(dplyr)
# Create data frame
data <- data.frame(
country = c("Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi",
"Cabo Verde", "Cameroon", "Central African Republic", "Chad", "Comoros",
"Côte d'Ivoire", "Democratic Republic of the Congo", "Djibouti", "Egypt",
"Equatorial Guinea", "Eritrea", "Eswatini", "Ethiopia", "Gabon", "Gambia",
"Ghana", "Guinea", "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya",
"Madagascar", "Malawi", "Mali", "Mauritania", "Mauritius", "Morocco",
"Mozambique", "Namibia", "Niger", "Nigeria", "Rwanda", "São Tomé and Príncipe",
"Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa",
"South Sudan", "Sudan", "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia",
"Zimbabwe"),
criminalisation = c("De facto criminalisation", "No criminalisation", "No criminalisation",
"No criminalisation", "No criminalisation", "Up to 8 years imprisonment",
"No criminalisation", "Up to 8 years imprisonment", "Up to 8 years imprisonment",
"Up to 8 years imprisonment", "Up to 8 years imprisonment", "No criminalisation",
"No criminalisation", "No criminalisation", "De facto criminalisation",
"No criminalisation", "Up to 8 years imprisonment", "No criminalisation",
"Up to 8 years imprisonment", "No criminalisation", "10 years to life in prison",
"Up to 8 years imprisonment", "10 years to life in prison", "No criminalisation",
"Up to 8 years imprisonment", "No criminalisation", "Up to 8 years imprisonment",
"De facto criminalisation", "No criminalisation", "Up to 8 years imprisonment",
"No criminalisation", "Death Penalty", "Up to 8 years imprisonment",
"Up to 8 years imprisonment", "No criminalisation", "No criminalisation",
"No criminalisation", "Death Penalty (possible)", "No criminalisation",
"No criminalisation", "10 years to life in prison", "No criminalisation",
"Up to 8 years imprisonment", "Death Penalty", "No criminalisation",
"De facto criminalisation", "Death Penalty (possible)", "Up to 8 years imprisonment",
"Up to 8 years imprisonment", "Up to 8 years imprisonment",
"Death Penalty (possible)", "Up to 8 years imprisonment",
"Up to 8 years imprisonment")
)
# Convert 'criminalisation' column to factor with specified levels
data$criminalisation <- factor(data$criminalisation, levels = c("No criminalisation",
"De facto criminalisation",
"Up to 8 years imprisonment",
"10 years to life in prison",
"Death Penalty (possible)",
"Death Penalty"))
# Define a vector of colors
colour_criminalisation <- c("white", "#FFECB3", "#FFCC80", "#FF8A65", "#F4511E", "#B71C1C")
# Join the data frame with the map data
sPDF <- joinCountryData2Map(data,
joinCode = "NAME",
nameJoinColumn = "country")
Show the code
# Plot the map
mapCountryData(sPDF,
nameColumnToPlot = 'criminalisation',
colourPalette = colour_criminalisation,
catMethod = "categorical",
mapTitle = "Criminalisation of consensual same-sex sexual acts in Africa (2024)",
xlim = c(-20, 60),
ylim = c(-35, 40),
oceanCol = "#EBF5FB",
lwd = 0.5,
addLegend = FALSE)  # Disable default legend
# Manually add a smaller legend on the side
legend("bottomleft", legend = levels(data$criminalisation), fill = colour_criminalisation, bty = "n", cex = 0.8)

Inspired by :#https://www.reuters.com/graphics/UGANDA-LGBT/movakykrjva/ and data from : https://database.ilga.org/criminalisation-consensual-same-sex-sexual-acts

Day 13 : Family

Show the code
library(ggraph)
library(igraph)
first_name = c(NA , "Agnes", "Vincent", "Susan", "Henri", "Anne-Marie", "Serge", "Sonia", "Peter", "Simone", "Andre", "George", "mama Papi", "one Nanie", "two Nanie", "Lia", "Thais", "Olivier",
               "Joseph", "Sara", "Lucette", "Me", NA, NA, NA, NA, "Marie-Laure", "Antoine", "Arthur", NA, "Eric", NA, "Johnny", "Camille",
               "Albin", "Nina", "Isabelle", NA, "Laurie", "Iris", "Pierrick", NA, "Julie", "Arsene", "Robin", "?", NA, "Laura", "Helios", NA,
               "Betty", "Vasya", NA, "Gerard", "Florence", "Sandrine", NA, "Pierre", "Luna", "Nina", "Nathan", NA, "Pere vic et clo", "Victor", "Clotilde", NA, "Alain", "Bastien", "Simon", "Jeanne", "Marie", NA,
               "Rebecca", "Mael", "Soan", NA, "Aurelien", "Haldora", "Lilas", NA, "Maman Mamo", "Louis", NA, "Manou",
               "Linette", "Annie", NA, "Serge", "Fabienne", "Alain", "Jean-Yves", NA, "Olivier", "Lola", "Jeanne", NA, "Agnes", "Charlotte",
               "Elsa", NA, "Martine", "Guillaume", "Melanie", NA, "Basile", "Charlie", "Mae", NA, "Marie", "?", NA,
               "Jacques", "Cecile", "Paul", "Jacques", "Mimi (Marie-Blanche)", NA) 
nodes<-data.frame(name=1:117)
edges <- data.frame(
from = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 2, 5, 4, 3, 28, 29, 27, 5,
         4, 31, 33, 34, 35, 36, 37, 18, 28, 39, 40, 41, 29, 43, 44, 45, 46, 19, 48, 49, 19, 51, 52, 21, 54, 55, 56, 55, 58, 59, 60, 61, 56, 63, 64, 65,
         20, 67, 68, 69, 70, 71, 73, 74, 75, 69, 70, 77, 79, 78, 81, 82, 10, 84, 82, 86, 85, 85, 88, 89, 90, 91, 89, 93, 94, 95,
         90, 97, 98, 99, 91, 101, 102, 103, 102, 109, 110, 103, 105, 106, 107, 112, 113, 114, 115, 8, 116),
to = c(1, 1, 23, 23, 26, 26, 25, 25, 24, 24, 6, 6, 7, 7, 1, 1, 26, 23, 25, 24, 1, 23, 24, 25, 26, 30, 30, 30, 30,
       32, 32, 32, 32, 38, 38, 38, 38, 42, 42, 42, 42, 47, 47, 47, 47, 47, 50, 50, 50, 53, 53, 53, 57, 57, 57, 57, 62, 62, 62, 62, 62, 66, 66, 66, 66,
       72, 72, 72, 72, 72, 72, 76, 76, 76, 76, 80, 80, 80, 80, 83, 83, 83, 87, 87, 87, 87, 92, 92, 92, 92, 92, 96, 96, 96, 96,
       100, 100, 100, 100, 104, 104, 104, 104, 111, 111, 111, 108, 108, 108, 108, 117, 117, 117, 117, 117, 117)
)

height<-c(2, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 3, 3, 5, 
          5, 1, 4, 6, 6, 4, 5, 3, 3, 4, 5, 4, 3, 3, 1, 1, 3, 2, 3, 1, 1, 2,
          3, 1, 1, 1, 2, 3, 1, 2, 3, 1, 2, 5, 3, 3, 4, 3, 1, 1, 1, 2, 3, 1, 1, 2, 5, 3, 3, 3, 3, 4, 3, 1, 1, 2, 3, 1, 1, 2,
          9, 9, 8, 9, 7, 7, 8, 7, 5, 5, 5, 6, 5, 3, 3, 4, 5, 3, 3, 4, 5, 3, 3, 4, 3, 1, 1, 2, 3, 1, 2, 9, 9, 7, 7, 7, 8)

g <- graph_from_data_frame(edges, vertices = nodes, directed = TRUE)

lext<-create_layout(g,layout='kk')


lext[,2]=height

ggraph(g, layout = lext) +
  geom_edge_link() +
  geom_node_point() +
  geom_node_label(aes(label=first_name), size = 3, alpha = 0.9, 
                  color = "#8856a7",repel=TRUE) +
  theme_void()

Show the code
library(visNetwork)
library(igraph)
# Define nodes and edges
first_name = c(NA , "Agnes", "Vincent", "Susan", "Henri", "Anne-Marie", "Serge", "Sonia", "Peter", "Simone", "Andre", "George", "mama Papi", "one Nanie", "two Nanie", "Lia", "Thais", "Olivier",
"Joseph", "Sara", "Lucette", "Me", NA, NA, NA, NA, "Marie-Laure", "Antoine", "Arthur", NA, "Eric", NA, "Johnny", "Camille",
"Albin", "Nina", "Isabelle", NA, "Laurie", "Iris", "Pierrick", NA, "Julie", "Arsene", "Robin", "?", NA, "Laura", "Helios", NA,
"Betty", "Vasya", NA, "Gerard", "Florence", "Sandrine", NA, "Pierre", "Luna", "Nina", "Nathan", NA, "Pere vic et clo", "Victor", "Clotilde", NA, "Alain", "Bastien", "Simon", "Jeanne", "Marie", NA,
"Rebecca", "Mael", "Soan", NA, "Aurelien", "Haldora", "Lilas", NA, "Maman Mamo", "Louis", NA, "Manou",
"Linette", "Annie", NA, "Serge", "Fabienne", "Alain", "Jean-Yves", NA, "Olivier", "Lola", "Jeanne", NA, "Agnes", "Charlotte",
"Elsa", NA, "Martine", "Guillaume", "Melanie", NA, "Basile", "Charlie", "Mae", NA, "Marie", "?", NA,
"Jacques", "Cecile", "Paul", "Jacques", "Mimi (Marie-Blanche)", NA)
nodes <- data.frame(id = 1:117, label = first_name)
edges <- data.frame(
from = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 2, 5, 4, 3, 28, 29, 27, 5,
4, 31, 33, 34, 35, 36, 37, 18, 28, 39, 40, 41, 29, 43, 44, 45, 46, 19, 48, 49, 19, 51, 52, 21, 54, 55, 56, 55, 58, 59, 60, 61, 56, 63, 64, 65,
20, 67, 68, 69, 70, 71, 73, 74, 75, 69, 70, 77, 79, 78, 81, 82, 10, 84, 82, 86, 85, 85, 88, 89, 90, 91, 89, 93, 94, 95,
90, 97, 98, 99, 91, 101, 102, 103, 102, 109, 110, 103, 105, 106, 107, 112, 113, 114, 115, 8, 116),
to = c(1, 1, 23, 23, 26, 26, 25, 25, 24, 24, 6, 6, 7, 7, 1, 1, 26, 23, 25, 24, 1, 23, 24, 25, 26, 30, 30, 30, 30,
32, 32, 32, 32, 38, 38, 38, 38, 42, 42, 42, 42, 47, 47, 47, 47, 47, 50, 50, 50, 53, 53, 53, 57, 57, 57, 57, 62, 62, 62, 62, 62, 66, 66, 66, 66,
72, 72, 72, 72, 72, 72, 76, 76, 76, 76, 80, 80, 80, 80, 83, 83, 83, 87, 87, 87, 87, 92, 92, 92, 92, 92, 96, 96, 96, 96,
100, 100, 100, 100, 104, 104, 104, 104, 111, 111, 111, 108, 108, 108, 108, 117, 117, 117, 117, 117, 117)
)
# Create the interactive network graph
visNetwork(nodes, edges) %>%
visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE) %>%
visPhysics(stabilization = FALSE)

Day 14 : Heatmap

Show the code
library(geosphere)
library(ggplot2)
library(reshape2)

# Define the coordinates (latitude and longitude) for each capital
capitals <- data.frame(
city = c("London", "Paris", "Berlin", "Madrid", "Rome", "Vienna", "Amsterdam",
"Brussels", "Warsaw", "Prague", "Lisbon", "Athens", "Budapest",
"Stockholm", "Copenhagen", "Dublin"),
lat = c(51.5074, 48.8566, 52.5200, 40.4168, 41.9028, 48.2082, 52.3676,
50.8503, 52.2297, 50.0755, 38.7167, 37.9838, 47.4979, 59.3293,
55.6761, 53.3498),
lon = c(-0.1278, 2.3522, 13.4050, -3.7038, 12.4964, 16.3738, 4.9041,
4.3517, 21.0122, 14.4378, -9.1393, 23.7275, 19.0402, 18.0686,
12.5683, -6.2603)
)

# Calculate the distance matrix
distance_matrix <- distm(capitals[, c("lon", "lat")], fun = distHaversine) / 1000  # Convert to kilometers

# Convert distance matrix to a data frame for plotting
distance_df <- as.data.frame(as.table(as.matrix(distance_matrix)))
names(distance_df) <- c("Capital1", "Capital2", "Distance")
distance_df$Capital1 <- factor(distance_df$Capital1, labels=capitals$city)
distance_df$Capital2 <- factor(distance_df$Capital2, labels=capitals$city)

# Plot the heatmap
heatmap = ggplot(distance_df, aes(Capital1, Capital2, fill = Distance)) +
geom_tile(color = "black") +
scale_fill_gradientn(colors = c("blue", "white", "yellow", "orange", "red")) +
theme_minimal() +
labs(title = "Distance Between Major European Capitals (in km)",
x = "", y = "", fill = "Distance (km)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
heatmap

Adapting graphs for colour blinds

Show the code
library(geosphere)
library(ggplot2)
library(reshape2)

# Define the coordinates (latitude and longitude) for each capital
capitals <- data.frame(
city = c("London", "Paris", "Berlin", "Madrid", "Rome", "Vienna", "Amsterdam",
"Brussels", "Warsaw", "Prague", "Lisbon", "Athens", "Budapest",
"Stockholm", "Copenhagen", "Dublin"),
lat = c(51.5074, 48.8566, 52.5200, 40.4168, 41.9028, 48.2082, 52.3676,
50.8503, 52.2297, 50.0755, 38.7167, 37.9838, 47.4979, 59.3293,
55.6761, 53.3498),
lon = c(-0.1278, 2.3522, 13.4050, -3.7038, 12.4964, 16.3738, 4.9041,
4.3517, 21.0122, 14.4378, -9.1393, 23.7275, 19.0402, 18.0686,
12.5683, -6.2603)
)

# Calculate the distance matrix
distance_matrix <- distm(capitals[, c("lon", "lat")], fun = distHaversine) / 1000  # Convert to kilometers

# Convert distance matrix to a data frame for plotting
distance_df <- as.data.frame(as.table(as.matrix(distance_matrix)))
names(distance_df) <- c("Capital1", "Capital2", "Distance")
distance_df$Capital1 <- factor(distance_df$Capital1, labels=capitals$city)
distance_df$Capital2 <- factor(distance_df$Capital2, labels=capitals$city)

# Plot the heatmap
heatmap_viridis = ggplot(distance_df, aes(Capital1, Capital2, fill = Distance)) +
geom_tile(color = "black") +
scale_fill_viridis() +
theme_minimal() +
labs(title = "Distance Between Major European Capitals (in km)",
x = "", y = "", fill = "Distance (km)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
heatmap_viridis

Day 15 : Historical

Aviation data from 1990 : https://ourworldindata.org/global-aviation-emissions

Show the code
# Load required libraries
library(ggplot2)
library(readr)
library(patchwork)

# Read the data
aviation = read.csv("../Rwork/30DayChartChallenge/aviation.csv")

# Line plot for Passenger Demand
a = ggplot(aviation, aes(x = Year, y = `Passenger.demand`, group = 1)) +
  geom_line() +
  labs(title = "Passenger Demand Over Time", x = "Year", y = "Passenger Demand") +
  theme_minimal()

# Bar plot of Freight Demand Over Time
b = ggplot(aviation, aes(x = Year, y = `Freight.demand`)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Freight Demand Over Time (million-ton km)", x = "Year", y = "Freight Demand") +
  theme_minimal()

#Scatter Plot of Carbon Intensity vs. Year
c <- ggplot(aviation, aes(x = Year, y = `Carbon.intensity`)) +
  geom_point(aes(color = `Carbon.per.unit.energy`), size = 2) +
  labs(title = "Scatter Plot of Carbon Intensity vs. Year", x = "Year", y = "Carbon Intensity (gCO₂ per passenger-km)") +
  scale_color_gradient(low = "lightblue", high = "darkblue") + 
  theme_minimal()

# Line plot for CO2 Emissions
d = ggplot(aviation, aes(x = Year, y = `CO2.emissions`)) +
  geom_line(color = "black") +
  labs(title = "CO2 Emissions Over Time", x = "Year", y = "CO2 Emissions") +
  theme_minimal()

a + b + c + d

Day 16 : Weather

Two ways of viewing the same data :

Show the code
library(readr)
library(ggplot2)
library(patchwork)

disaster = read_csv("../Rwork/30DayChartChallenge/natural-disasters-by-type.csv")

plot_disaster <- ggplot(data = disaster, aes(x = Year, y = Disasters, group = Entity, color = Entity)) + 
  geom_line() + 
  labs(title = "Number of recorded natural disaster events, 1900 to 2024",
       x = "Year",
       y = "",
       color = "Disasters") + 
  theme_minimal() + theme(legend.position = "left")
plot_disaster1 = ggplotly(plot_disaster)

#stacked bar plot
disasterr = disaster[-(1:165),]
pastel_colors <- c("#FBB4AE", "#E5D8BD", "#CCEBC5", "#DECBE4", "#FED9A6", "#B3CDE3", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4")

plot_disasterr <- ggplot(data = disasterr, aes(x = Year, y = Disasters, group = Entity, fill = Entity)) + 
  geom_bar(stat = "identity", position = "stack") + 
  scale_fill_manual(values = pastel_colors )
  labs(x = "Year", y = "") +
    theme_minimal()
Show the code
plot_disasterr

Show the code
plot_disaster1
Show the code
#plot_disasterr / plot_disaster+ theme(legend.position = "left") + plot_annotation(title = 'Number of recorded natural disaster events, 1900 to 2024 ')

Day 17 : Network

Network graph using the distance between cities.

Show the code
#Make the graph with k=3 nearest neighbors
library(ggraph)
library(igraph)
library(tidyverse)
library(tidygraph)

# install.packages("FastKNN")
mateuro<-as.matrix(eurodist)
n=ncol(mateuro)
library(FastKNN)
k=3
nn = matrix(0,n,k) # n x k
for (i in 1:n)
  nn[i,] = k.nearest.neighbors(i, mateuro, k = k)
nngraph<-matrix(0,ncol=n,nrow=n)
for (i in 1:n){
  nngraph[i,nn[i,]]=1
}
ig<-graph_from_adjacency_matrix(nngraph,mode = "undirected")
V(ig)$name<-rownames(mateuro)

coords<-cmdscale(eurodist,k=2)
layg<-graph_from_adjacency_matrix(mateuro,weighted=TRUE)


graph1 <- as_tbl_graph(ig)

ggraph(graph1,layout=coords) +
  geom_edge_link(aes(colour = "red")) +
  geom_node_point()+
  geom_node_label(aes(label=rownames(mateuro)),repel=TRUE) +
  labs(title = "Graph of the three nearest neighbours") +
  theme_void() + 
  theme(legend.position = "none") 

Day 18 : Asian Develmt Bank

Show the code
# Load libraries
library(ggplot2)

# Prepare the data
aus<-readRDS("../Rwork/30DayChartChallenge/Aussie2.rds")
aust=t(aus)
aust=as.data.frame(aust)

#Create the plot 
Cloth_Alcohol_correlation = ggplot(aust,aes(x=Alcohol_Tobacco_drugs,y=Clothing_footwear))+
  geom_point(aes(col=Year), size = 2) +
  scale_colour_gradient(low = "lightblue", high = "darkgreen") +
  labs(title = "Price of clothing and alcohol in Australia", x = "Alcohol, Tabacco and drugs", y = "Clothing and footwear") +
  geom_label(aes(x = 115, y = 102, label = "Negative correlation = -0.84"), color = "black", fill = "white", size = 3.5, hjust = 0) +
  theme_bw()

Cloth_Alcohol_correlation

From the ADB Bank : https://data.adb.org/dataset/australia-key-indicators

Day 19 : Dinosaurs

Reference to Anscombe’s quartet that shows the importance of looking at the scatter plot graph, and not only the statistics like correlation or mean !

Show the code
# Load the libraries
library(datasauRus)
library(ggplot2)
library(dplyr)

# Define color palette
colors <- c("dino" = "#FF9999", "bullseye" = "#FFCC99", "star" = "#FFFF99", "slant_up" = "#99FF99")

# Filter the datasets from the datasaurus_dozen
datasets <- datasaurus_dozen %>%
  filter(dataset %in% c("dino", "bullseye", "star", "slant_up"))

# Create the plots
ggplot(datasets, aes(x = x, y = y, color = dataset)) +
  geom_point() +
  scale_color_manual(values = colors) +
  facet_wrap(~ dataset) +
  theme_minimal() +
  labs(title = "Datasaurus Dozen",
       x = "X",
       y = "Y") +
  theme(legend.position = "none", plot.title = element_text(hjust = 0.5))

Day 20 : Correlation

Zooming in the correlation of Data from ABD Day 18 in Australia:

Show the code
library(pheatmap)
library(ggplot2)

#Create data frame out of the ADB data of Australia already used before
australia = aust[,c(1,4,20,32,33,29,30,36,37,38,41,45)]

# You can already see the correlation matrix with round(cor(australia),2)

# 3 different heatmaps
Aus1 = pheatmap(cor(australia))

Show the code
aus2<-australia[,-c(5,10)]
Aus2 = pheatmap(cor(aus2))

Show the code
aus3<-australia[,-c(5,6,10,12,11,9)]
Aus3 = pheatmap(cor(aus3))

Show the code
# Observation : We see that the biggest correlation is between the money invested in Health and the total population which is normal because the more there is humans the more we need money in Health